# ------------------------------------------------------------- #
# Author: Nicolas Polasek
# Goal: Replicate columns (3) and (4) of Table 9 from 
#       Bustos, Paula, Bruno Caprettini, and Jacopo Ponticelli. 
#       2016. "Agricultural Productivity and Structural 
#       Transformation: Evidence from Brazil." American Economic 
#       Review, 106 (6): 1320-65.
# Date: April 13th, 2023
# Contact: nicolas_polasek@berkeley.edu
# ------------------------------------------------------------- #

# 0 Set environment ---------------------------------------------------------
rm(list = ls())
setwd("/Users/nicolaspolasek/Documents/Career/Education/PhD/ Berkeley ARE/Courses/ Spring 2023/ECON 270B/Replication Exercise/Replication data for Bustos et al. 2016/APST_ReplicationFiles")

library(haven) # reads stata files
library(tidyverse) # one world of data management tools
library(data.table) # the other major world of R data management tools
library(kableExtra)
library(readxl)
library(DescTools)
library(kableExtra)
library(stargazer)
library(dyn)
library(lmtest)
library(sandwich)


# 1 Import and clean data -------------------------------------------------------------

## Import data --------------------------------------------------------------
apst_amc <- read_dta("APST_AMC.dta")

## Clean data --------------------------------------------------------------
# Take 4th lags of control variables and merge them at year 2010
control_vars <- apst_amc %>% 
  # Select required variables
  select(year, time, cod_uf, cod_amc, micro, meso, # select identifier variables
         rural_adult, log_y_pc_r, log_pop_area, alpha_adult # select controls
         ) %>% 
  # keep 4th lag only
  filter(year == 1991) %>%  
  # rename variables
  rename(rural_adult_1991 = rural_adult,
         log_y_pc_r_1991 = log_y_pc_r,
         log_pop_area_1991 = log_pop_area,
         alpha_adult_1991 = alpha_adult) %>% 
  # Change time and year variables to 2010 and 6
  mutate(year = 2010,
         time = 6) %>%  
  # Keep only identifiers and lagged variables
  select(year, time, cod_uf, cod_amc, micro, meso,
         rural_adult_1991, log_y_pc_r_1991, log_pop_area_1991, alpha_adult_1991)

# Merge lagged variables back with analysis data
apst_amc1 <- apst_amc %>% 
  left_join(control_vars,
            by = c("year", "time", "cod_uf", "cod_amc", "micro", "meso"))


# 2 Regressions presented in Table 9 ----------------------------------------

## Column 3 -----------------------------------------------------------------
# estimates
col_3 <- lm(data = apst_amc1 %>% filter(year == "2010" & rf == 1),
            dlog_Lm ~ dA_soy + dA_mze + 
              rural_adult_1991)
summary(col_3)

# Heteroscedasticity robust standard errors
coeftest(col_3, vcov = vcovHC(col_3, type = 'HC0'))

## Column 4 -----------------------------------------------------------------
# estimates
col_4 <- lm(data = apst_amc1 %>% filter(year == "2010" & rf == 1),
            dlog_Lm ~ dA_soy + dA_mze + 
              rural_adult_1991 + log_y_pc_r_1991 + log_pop_area_1991 + 
              alpha_adult_1991)
summary(col_4)

# Heteroscedasticity robust standard errors
coeftest(col_4, vcov = vcovHC(col_4, type = 'HC0'))




